#!/bin/bash

#  Copyright (C) 2018 Oracle.  All Rights Reserved.
#
#  Author: Darrick J. Wong <darrick.wong@oracle.com>
#
#  This program is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License
#  as published by the Free Software Foundation; either version 2
#  of the License, or (at your option) any later version.
#
#  This program is distributed in the hope that it would be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write the Free Software Foundation,
#  Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.

# Automatically check an LVM-managed filesystem online.
# We use lvm snapshots to do this, which means that we can only
# check filesystems in VGs that have at least 256MB (or so) of
# free space.

PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin

if (( $EUID != 0 )); then
    echo "e2scrub must be run as root"
    exit 1
fi

snap_size_mb=256
fstrim=0
reap=0
e2fsck_opts=""
conffile="@root_sysconfdir@/e2scrub.conf"

test -f "${conffile}" && . "${conffile}"

print_help() {
	echo "Usage: $0 [OPTIONS] mountpoint | device"
	echo
	echo "mountpoint must be on an LVM-managed block device"
	echo "-n: Show what commands e2scrub would execute."
	echo "-r: Remove e2scrub snapshot and exit, do not check anything."
	echo "-t: Run fstrim if successful."
	echo "-V: Print version information and exit."
}

print_version() {
	echo "e2scrub @E2FSPROGS_VERSION@ (@E2FSPROGS_DATE@)"
}

exitcode() {
	ret="$1"

	# If we're being run as a service, the return code must fit the LSB
	# init script action error guidelines, which is to say that we
	# compress all errors to 1 ("generic or unspecified error", LSB 5.0
	# section 22.2) and hope the admin will scan the log for what
	# actually happened.

	# We have to sleep 2 seconds here because journald uses the pid to
	# connect our log messages to the systemd service.  This is critical
	# for capturing all the log messages if the scrub fails, because the
	# fail service uses the service name to gather log messages for the
	# error report.
	if [ -n "${SERVICE_MODE}" -a "${ret}" -ne 0 ]; then
		test "${ret}" -ne 0 && ret=1
		sleep 2
	fi

	exit "${ret}"
}

while getopts "nrtV" opt; do
    case "${opt}" in
	"n") DBG="echo Would execute: " ;;
	"r") reap=1;;
	"t") fstrim=1;;
	"V") print_version; exitcode 0;;
	*) print_help; exitcode 2;;
	esac
done
shift "$((OPTIND - 1))"

arg="$1"
if [ -z "${arg}" ]; then
	print_help
	exitcode 1
fi

if ! type lsblk >& /dev/null ; then
    echo "e2scrub: can't find lsblk --- is util-linux installed?"
    exitcode 1
fi

if ! type lvcreate >& /dev/null ; then
    echo "e2scrub: can't find lvcreate --- is lvm2 installed?"
    exitcode 1
fi

# close file descriptor 3 (from cron) since it causes lvm to kvetch
exec 3<&-

# Find the device for a given mountpoint
dev_from_mount() {
	local mountpt="$(realpath "$1")"

	lsblk -o NAME,FSTYPE,MOUNTPOINT -p -P -n 2> /dev/null | while read vars; do
		eval "${vars}"
		if [ "${mountpt}" != "${MOUNTPOINT}" ]; then
			continue
		fi
		case "${FSTYPE}" in
		ext[234])
			echo "${NAME}"
			return 0
			;;
		esac
	done
	return 1
}

# Check a device argument
dev_from_arg() {
	local dev="$1"
	local fstype="$(lsblk -o FSTYPE -n "${dev}" 2> /dev/null)"

	case "${fstype}" in
	ext[234])
		echo "${dev}"
		return 0
		;;
	esac
	return 1
}

mnt_from_dev() {
	local dev="$1"

	if [ -n "${dev}" ]; then
		lsblk -o MOUNTPOINT -n "${dev}"
	fi
}

# Construct block device path and mountpoint from argument
if [ -b "${arg}" ]; then
	dev="$(dev_from_arg "${arg}")"
	mnt="$(mnt_from_dev "${dev}")"
else
	dev="$(dev_from_mount "${arg}")"
	mnt="${arg}"
fi
if [ ! -e "${dev}" ]; then
	echo "${arg}: Not an ext[234] filesystem."
	print_help
	exitcode 16
fi

# Do not scrub unjournalled filesystems; they are inconsistent when mounted
if [ "${reap}" -eq 0 ] && ! dumpe2fs -h "${dev}" | grep -q 'has_journal'; then
	echo "${arg}: Filesystem has no journal, cannot check."
	print_help
	exitcode 16
fi

# Make sure this is an LVM device we can snapshot
lvm_vars="$(lvs --nameprefixes -o name,vgname,lv_role --noheadings "${dev}" 2> /dev/null)"
eval "${lvm_vars}"
if [ -z "${LVM2_VG_NAME}" ] || [ -z "${LVM2_LV_NAME}" ] ||
   echo "${LVM2_LV_ROLE}" | grep -q "snapshot"; then
	echo "${arg}: Not connected to an LVM logical volume."
	print_help
	exitcode 16
fi
start_time="$(date +'%Y%m%d%H%M%S')"
snap="${LVM2_LV_NAME}.e2scrub"
snap_dev="/dev/${LVM2_VG_NAME}/${snap}"

teardown() {
	# Remove and wait for removal to succeed.
	${DBG} lvremove -f "${LVM2_VG_NAME}/${snap}"
	while [ "$?" -eq "5" ] && [ -e "${snap_dev}" ]; do
		sleep 0.5
		${DBG} lvremove -f "${LVM2_VG_NAME}/${snap}"
	done
}

check() {
	# First we recover the journal, then we see if e2fsck tries any
	# non-optimization repairs.  If either of these two returns a
	# non-zero status (errors fixed or remaining) then this fs is bad.
	E2FSCK_FIXES_ONLY=1
	export E2FSCK_FIXES_ONLY
	${DBG} "@root_sbindir@/e2fsck" -E journal_only -p ${e2fsck_opts} "${snap_dev}" || return $?
	${DBG} "@root_sbindir@/e2fsck" -f -y ${e2fsck_opts} "${snap_dev}"
}

mark_clean() {
	${DBG} "@root_sbindir@/tune2fs" -C 0 -T "${start_time}" "${dev}"
}

mark_corrupt() {
	${DBG} "@root_sbindir@/tune2fs" -E force_fsck "${dev}"
}

setup() {
	# Try to remove snapshot for 30s, bail out if we can't remove it.
	lvremove_deadline="$(( $(date "+%s") + 30))"
	${DBG} lvremove -f "${LVM2_VG_NAME}/${snap}" 2>/dev/null
	while [ "$?" -eq "5" ] && [ -e "${snap_dev}" ] &&
	      [ "$(date "+%s")" -lt "${lvremove_deadline}" ]; do
		sleep 0.5
		${DBG} lvremove -f "${LVM2_VG_NAME}/${snap}"
	done
	if [ -e "${snap_dev}" ]; then
		echo "${arg}: e2scrub snapshot is in use, cannot check!"
		return 1
	fi
	# Create the snapshot, wait for device to appear.
	${DBG} lvcreate -s -L "${snap_size_mb}m" -n "${snap}" "${LVM2_VG_NAME}/${LVM2_LV_NAME}"
	if [ $? -ne 0 ]; then
		echo "${arg}: e2scrub snapshot FAILED, will not check!"
		return 1
	fi
	${DBG} udevadm settle 2> /dev/null
	return 0
}

if [ "${reap}" -gt 0 ]; then
	if [ -e "${snap_dev}" ]; then
		teardown 2> /dev/null
	fi
	exit 0
fi
if ! setup; then
	exitcode 8
fi
trap "teardown; exit 1" EXIT INT QUIT TERM

# Check and react
check
case "$?" in
"0")
	# Clean check!
	echo "${arg}: Scrub succeeded."
	mark_clean
	teardown
	trap '' EXIT

	# Trim the free space, which requires the snapshot be deleted.
	if [ "${fstrim}" -eq 1 ] && [ -d "${mnt}" ] && type fstrim > /dev/null 2>&1; then
		echo "${arg}: Trimming free space."
		fstrim -v "${mnt}"
	fi

	ret=0
	;;
"8")
	# Operational error, what now?
	echo "${arg}: e2fsck operational error."
	teardown
	trap '' EXIT
	ret=8
	;;
*)
	# fsck failed.  Check if the snapshot is invalid; if so, make a
	# note of that at the end of the log.  This isn't necessarily a
	# failure because the mounted fs could have overflowed the
	# snapshot with regular disk writes /or/ our repair process
	# could have done it by repairing too much.
	#
	# If it's really corrupt we ought to fsck at next boot.
	is_invalid="$(lvs -o lv_snapshot_invalid --noheadings "${snap_dev}" | awk '{print $1}')"
	if [ -n "${is_invalid}" ]; then
		echo "${arg}: Scrub FAILED due to invalid snapshot."
		ret=8
	else
		echo "${arg}: Scrub FAILED due to corruption!  Unmount and run e2fsck -y."
		mark_corrupt
		ret=6
	fi
	teardown
	trap '' EXIT
	;;
esac

exitcode "${ret}"
